*****************************************************************************
* STATISTIC MAIN TABLE
*****************************************************************************
use "${output_stata}\main_regression.dta", clear // Load main regression data

merge m:1 sirtg year using "${input_stata}shift.dta" // Merge with shift data by sirtg and year
drop if _merge == 2 // Drop unmatched observations from the shift data
drop _merge

rename apgr_1_num apgr // Rename sector var
rename code_entry_num apgr_b 
merge m:1 apgr apgr_b year using "${input_stata}\distance_bvrs_agg" // Merge with distance data
drop _merge
rename apgr apgr_1_num // Revert variable names
rename apgr_b code_entry_num

************************************************

replace HC_overlap_mean = 1 - HC_overlap_mean // Invert overlap measures
replace HC_overlap_weight_ca = 1 - HC_overlap_weight_ca
replace HC_overlap_weight_eff = 1 - HC_overlap_weight_eff
replace HC_overlap_p5 = 1 - HC_overlap_p5

eststo clear 

gen HC_distance_s_brut_t1b = HC_distance_s_brut_t1  // Create a copy of the variable
label var buy "1(Buy)$\_{\textit{g,n,t}}$" 
label var HC_distance_s_brut_t1b "HC Distance$\_{\textit{g,n,t-1}} (wages - baseline)$"
label var HC_overlap_mean "HC Distance$\_{\textit{g,n,t-1}}$ (simple average)"
label var HC_overlap_weight_ca "HC Distance$\_{\textit{g,n,t-1}}$ (weighted average: sales)"
label var HC_overlap_weight_eff "HC Distance$\_{\textit{g,n,t-1}}$ (weighted average: employment)"
label variable listed_gr "1(Public)$\_{\textit{g,t-1}}$" 
label variable any_exit "Sales shift (100\%)$\_{\textit{g,t-1,t}}$" 
label var diversifie "1(Diversified)$\_{\textit{g,t-1}}$"
label var same1 "1(Same 1-digit Industry)$\_{\textit{o,n}}$" 
label var same2 "1(Same 2-digit Industry)$\_{\textit{o,n}}$"
label var upstream05 "1(Upstream link >5%)$\_{\textit{o,n}}$" 
label var upstream10 "1(Upstream link >10%)$\_{\textit{o,n}}$" 
label var downstream05 "1(Downstream link >5%)$\_{\textit{o,n}}$" 
label var downstream10 "1(Downstream link >10%)$\_{\textit{o,n}}$" 
label var age "Age$\_{\textit{g,t-1}}$" 

gen log_inv = log(1+investment) 
label variable log_inv "log(1+Investment)$\_{\textit{g,t}}$"
label variable capint_agg "Capital intensity$\_{\textit{n,t}}$"
label var entree_dep "1(New department)$\_{\textit{g,t}}$" 
label var entree_reg "1(New region)$\_{\textit{g,t}}$"
rename distance distance_bvrs_agg // Rename for clarity
label var distance_bvrs_agg "Product market Distance$\_{\textit{o,n,t-1}}$" 
label variable survival "New firm survival$\_{\textit{n,t-1}}$"
label variable g_secteur_sales "Sales growth$\_{\textit{n,t-1}}$"
gen hhi = herfindahl // Create Herfindahl index variable
label variable hhi "Herfindahl index$\_{\textit{n,t-1}}$" 

* Define the sample
reghdfe buy HC_distance_s_brut_t1 /*
*/ vaj_eff_t1 diversity_t1 size_t1 immo_eff_t1 sal_eff_t1 tresact_eff_t1 if share> 0.01 & share <., absorb(orig_dest_year) cluster(apgr_1_num code_entry_num) 
gen sample = e(sample) 

gen delta_year = year - year_init_t0 // Calculate the difference in years
su delta_year, d

bys sirtg : egen sum_External = total(buy) if sample // Calculate sum of external acquisitions
gen serial_acquirer = sum_External >1 // Define serial acquirers
label variable serial_acquirer "Serial acquirer$\_{\textit{g}}$"

************
* Summary statistics - Org Capital 
************
replace nombre = nombre /1000 // Adjust the scale 
label variable nombre "N. New firms$\_{\textit{n,t-1}}$ (thousands)" 

replace capint_agg = capint_agg/1000 // Adjust the scale of capital intensity

replace admin_t1 = admin_t1
replace mkting_t1 = admin_t1 + mkting_t1 // calculate org cap var
replace info_t1 = mkting_t1 + info_t1
gen hr_dummy_t1 = hr_t1 > 0 // Create HR dummy
label variable rh_t1 "Top layers$\_{\textit{g,t-1}}$"
label variable mgmt_t1 "SG$\&$A$\_{\textit{g,t-1}}$"
label variable hr_dummy_t1 "1(HR Workers)$\_{\textit{g,t-1}}$"

label variable diversity_t1 "N. Occupations/N. Employees$\_{\textit{g,t-1}}$"

eststo clear
eststo summa: quietly estpost summ 
buy  HC_distance_s_brut_t1 HC_distance_nombre_t1 HC_distance_nbheur_t1 ///
HC_distance_s_brut_t0  HC_overlap_mean  HC_overlap_weight_eff HC_overlap_weight_ca ///
size_t1 diversity_t1 vaj_eff_t1 immo_eff_t1 sal_eff_t1 tresact_eff_t1 ///
any_exit  listed_gr serial_acquirer diversifie age log_inv ///
capint_agg entree_dep entree_reg distance_bvrs_agg ///
same1 same2 upstream05 upstream10 downstream05 downstream10 ///
nombre survival g_secteur_sales hhi rh_t1 mgmt_t1 hr_dummy_t1 ///
if sample, d

esttab summa,
cells("count(fmt(%15.0fc)) mean(fmt(2)) sd(fmt(2)) p5(fmt(2)) p25(fmt(2)) p50(fmt(2)) p75(fmt(2)) p95(fmt(2))") ///
nonumber label fragment

esttab summa using "${export}\stat_summ.tex",
cells("count(fmt(%15.0fc)) mean(fmt(2)) sd(fmt(2)) p5(fmt(2)) p25(fmt(2)) p50(fmt(2)) p75(fmt(2)) p95(fmt(2))") ///
star(* 0.10 ** 0.05 *** 0.01) substitute(\_ _)  /* 
*/ nomtitle noobs nonumber fragment booktabs label mlabels(none) collabels(none) noline replace

******************************************************************
* LLM TIGHTNESS
******************************************************************
use "${output_stata}\main_regression.dta", clear // Load data for LLM tightness analysis

* Version: December 2020
merge 1:m sirtg year code_entry using "${input_stata}\llm_tightness_agg" // Merge with LLM tightness data
keep if _merge == 3
drop if tightness_max ==. 

keep if share > 0.01 & share < . // Filter data, similar to main dataset

reghdfe buy HC_distance_s_brut_t1 /*
*/ vaj_eff_t1 diversity_t1 size_t1 immo_eff_t1 sal_eff_t1 tresact_eff_t1 /*
*/ , absorb(orig_dest_year codcom_root) cluster(apgr_1_num code_entry_num)
gen sample = e(sample) 

label var tightness_max "LLM tightness$\_{\textit{z,n,t-1}}$" 

eststo clear 
eststo summa: quietly estpost summ /// Summarize LLM tightness
tightness_max ///
if sample, d

esttab summa, /// Export summary statistics for tightness
cells("count(fmt(%15.0fc)) mean(fmt(2)) sd(fmt(2)) p5(fmt(2)) p25(fmt(2)) p50(fmt(2)) p75(fmt(2)) p95(fmt(2))") ///
nonumber label fragment

esttab summa using "${export}\stat_summ_llm.tex", ///
cells("count(fmt(%15.0fc)) mean(fmt(2)) sd(fmt(2)) p5(fmt(2)) p25(fmt(2)) p50(fmt(2)) p75(fmt(2)) p95(fmt(2))") ///
star(* 0.10 ** 0.05 *** 0.01) substitute(\_ _)  /* 
*/ nomtitle noobs nonumber fragment booktabs label mlabels(none) collabels(none) noline replace

*****************************************************************************
* EXTENSIVE MARGIN
*****************************************************************************
clear 
use "${output_stata}\ext_regression.dta" // Load data for extensive margin analysis

capture drop diversify // Drop variable if it exists
tab entree, missing // Tabulate 'entry' to check missing values
gen diversify = entree != "NA" & entree != "" // Create a 'diversify' variable

set more off 
eststo clear
summ share, d 

* Define the sample
reghdfe diversify HC_distance_s_brut_t1 /*
*/ diversity size immo_eff sal_eff tresact_eff vaj_eff /*
*/ , absorb(firm_year orig_dest_year) cluster(sirtg_num apgr_1_num code_entry_num)
gen sample = e(sample) 

label var diversify "1(Diversification)$\_{\textit{g,n,t-1}}$" // Label variable

eststo clear 
eststo summa: quietly estpost summ 
diversify ///
if sample, d

esttab summa, /// Export summary statistics for diversification
cells("count(fmt(%15.0fc)) mean(fmt(2)) sd(fmt(2)) p5(fmt(2)) p25(fmt(2)) p50(fmt(2)) p75(fmt(2)) p95(fmt(2))") ///
nonumber label fragment

esttab summa using "${export}\stat_summ_ext.tex",
cells("count(fmt(%15.0fc)) mean(fmt(2)) sd(fmt(2)) p5(fmt(2)) p25(fmt(2)) p50(fmt(2)) p75(fmt(2)) p95(fmt(2))") ///
star(* 0.10 ** 0.05 *** 0.01) substitute(\_ _)  /* 
*/ nomtitle noobs nonumber fragment booktabs label mlabels(none) collabels(none) noline replace

*****************************************************************************
* SIREN ENTRY
*****************************************************************************
clear 
use "${input_stata}\siren_entry.dta" // Load SIREN entry data

keep if sales_entry / l1_cagr > 0.01 & sales_entry / l1_cagr < . // Filter based on sales growth
keep if entree == "int_const" // Keep internal entries

destring code, replace force
bys sirtg code year: gen dup = _N // Identify duplicates
su dup, d 
keep if dup == 1 // Keep unique observations

merge 1:m sirtg code year using "${input_stata}\marche_f_HC_t1_sirennombre.dta" // Merge with HC data
keep if _merge == 3 // Retain matched data
drop _merge

merge 1:1 sirtg code year siren using "${input_stata}ctrl_var_siren_t1.dta" // Merge with control variables
keep if _merge == 3 
drop _merge

gen vaj_eff_t1 = vaj_t1 / (eff_3112_t1 * 1000) 
gen diversity_t1 = nb_pcs_t1 / eff_3112_t1
gen size_t1 = log(eff_3112_t1)
gen immo_eff_t1 = immocorp_t1 / (eff_3112_t1 * 1000)
gen sal_eff_t1 = sum_s_brut_t1 / eff_3112_t1
gen tresact_eff_t1 = tresact_t1 / (eff_3112_t1 * 1000)
gen siren_target = siren == siren_entry // Identify target firm responsible for the SIREN entries
gen HC_distance = 1 - HC_overlap // Calculate human capital distance overlap

egen id = group(sirtg code year) // Create group ID
egen orig_dest_year = group(apgr_1 code year) // Create another group ID

reghdfe siren_target HC_distance /*
*/ vaj_eff_t1 diversity_t1 size_t1  immo_eff_t1 sal_eff_t1 tresact_eff_t1, absorb(id) cluster(id) 

label var siren_target "1(Diversifying Subsidiary)$\_{\textit{f,n,t}}$"

eststo clear
eststo summa: quietly estpost summ /// 
siren_target ///
if e(sample), d

esttab summa, /// Export summary statistics
cells("count(fmt(%15.0fc)) mean(fmt(2)) sd(fmt(2)) p5(fmt(2)) p25(fmt(2)) p50(fmt(2)) p75(fmt(2)) p95(fmt(2))") ///
nonumber label fragment

esttab summa using "${export}\stat_summ_subs.tex", /// Export to LaTeX
cells("count(fmt(%15.0fc)) mean(fmt(2)) sd(fmt(2)) p5(fmt(2)) p25(fmt(2)) p50(fmt(2)) p75(fmt(2)) p95(fmt(2))") ///
star(* 0.10 ** 0.05 *** 0.01) substitute(\_ _)  /* 
*/ nomtitle noobs nonumber fragment booktabs label mlabels(none) collabels(none) noline replace

******************************************************************
* PERFORMANCE - DYNAMICS
******************************************************************
clear 
use "${output_stata}\reg_perf_corrig.dta" // Load corrected performance data

keep if entree == "external" | entree == "int_const" // Keep specific types of entries
drop share // Drop 'share' variable
gen share = s001_4 / l1_cagr // Recalculate 'share' sales/total group sales
keep if (share > 0.01 & share < .) | entree == "external" // Keep external if big enough

gen q_HC2_gp = q_HC2
gen log_eff_4 = log(eff_4) 

reghdfe log_eff  HC_distance_t1  vaj_eff_t1 diversity_t1 /*
*/ size_t1 immo_eff_t1 sal_eff_t1 tresact_eff_t1 if delta_b == 3 & entree == "int_const",   /*
*/ absorb(orig_dest_year)
estimates store log_eff 

eststo clear
eststo summa: quietly estpost summ /// Summarize log of employment
log_eff ///
if e(sample) & delta_b == 3, d

label var log_eff "log(Employment)$\_{\textit{f,t}}$" 

esttab summa, /// Export summary statistics for employment
cells("count(fmt(%15.0fc)) mean(fmt(2)) sd(fmt(2)) p5(fmt(2)) p25(fmt(2)) p50(fmt(2)) p75(fmt(2)) p95(fmt(2))") ///
nonumber label fragment

esttab summa using "${export}\stat_summ_log_eff.tex", /// Export to LaTeX
cells("count(fmt(%15.0fc)) mean(fmt(2)) sd(fmt(2)) p5(fmt(2)) p25(fmt(2)) p50(fmt(2)) p75(fmt(2)) p95(fmt(2))") ///
star(* 0.10 ** 0.05 *** 0.01) substitute(\_ _)  /* 
*/ nomtitle noobs nonumber fragment booktabs label mlabels(none) collabels(none) noline replace

drop log_sales
gen log_sales_4 = log(s001_4) 
reghdfe log_sales_4  HC_distance_t1  vaj_eff_t1 diversity_t1 /*
*/ size_t1 immo_eff_t1 sal_eff_t1 tresact_eff_t1 if delta_b == 3,   /*
*/ absorb(orig_dest_year) cluster(code_entry_num apgr_1) // Run regression for sales
estimates store log_sales

eststo clear 
eststo summa: quietly estpost summ /// Summarize log of sales
log_sales ///
if e(sample), d

label var log_sales "log(Sales)$\_{\textit{g,n,t}}$"

esttab summa, /// Export summary statistics for sales
cells("count(fmt(%15.0fc)) mean(fmt(2)) sd(fmt(2)) p5(fmt(2)) p25(fmt(2)) p50(fmt(2)) p75(fmt(2)) p95(fmt(2))") ///
nonumber label fragment

esttab summa using "${export}\stat_summ_log_sales.tex", 
cells("count(fmt(%15.0fc)) mean(fmt(2)) sd(fmt(2)) p5(fmt(2)) p25(fmt(2)) p50(fmt(2)) p75(fmt(2)) p95(fmt(2))") ///
star(* 0.10 ** 0.05 *** 0.01) substitute(\_ _)  /* 
*/ nomtitle noobs nonumber fragment booktabs label mlabels(none) collabels(none) noline replace

******************************************************************
* PERFORMANCE - REGRESSIONS
******************************************************************
clear 
use "${output_stata}\reg_perf_corrig.dta" // Load corrected performance data again

keep if entree == "external" | entree == "int_const" // Keep specific types of entries

* Exports survival variables
gen survival_5 = s001_5 > 0 // Generate survival indicators
gen survival_6 = s001_5 > 0 & s001_6 > 0
gen survival_7 = s001_5 > 0 & s001_6 > 0 & s001_7 > 0
gen survival_7b = s001_7 > 0
gen g_sales_4_7 = (s001_7 - s001_4) / s001_4 // Calculate growth rates
gen g_sales_4_5 = (s001_5 - s001_4) / s001_4
gen log_sales_4_7 = log(s001_7 / s001_4) // Log transformations
gen log_sales_4 = log(s001_4)
gen log_sales_7 = log(s001_7)
gen g_eff_4_7  = (eff_7 - eff_4) / eff_4 // Calculate employment growth rates
gen g_eff_3_7  = (eff_7 - eff_3) / eff_3
gen g_eff_3_4  = (eff_4 - eff_3) / eff_3
gen g_eff_4_5  = (eff_5 - eff_4) / eff_4
gen d_eff_3_4  = 2 * (eff_4 - eff_3) / (eff_3 + eff_4) // Employment distance
gen d_eff_3_7  = 2 * (eff_7 - eff_3) / (eff_3 + eff_7)
gen g_HC_distance_3_7  = (HC_dist_var_7 - HC_dist_var_3) / HC_dist_var_3 // Human capital distance growth
gen g_HC_distance_4_7  = (HC_dist_var_7 - HC_dist_var_4) / HC_dist_var_4
gen g_HC_distance_3_5  = (HC_dist_var_5 - HC_dist_var_3) / HC_dist_var_3
gen g_HC_distance_4_5  = (HC_dist_var_5 - HC_dist_var_4) / HC_dist_var_4
gen d_top5_3_7  = 2 * (top5_7 - top5_3) / (eff_3 + eff_7) // Top 5 occupations change
gen d_nottop5_3_7  = 2 * ((eff_7 - top5_7) - (eff_3 - top5_3)) / (eff_7 + eff_3)
gen g_top5_3_7  = (top5_7 - top5_3) / (eff_3)
gen g_nottop5_3_7  = ((eff_7 - top5_7) - (eff_3 - top5_3)) / (eff_3)
gen g_top5_3_7b  = (top5_7 - top5_3) / (top5_3)
gen g_nottop5_3_7b  = (not_top5_7 - not_top5_3) / (not_top5_3)
gen d_top5_3_6  = 2 * (top5_6 - top5_3) / (eff_3 + eff_6) // Calculate for 3 to 6
gen d_nottop5_3_6  = 2 * ((eff_6 - top5_6) - (eff_3 - top5_3)) / (eff_6 + eff_3)
gen d_eff_3_6  = 2 * (eff_6 - eff_3) / (eff_3 + eff_6)

reghdfe g_HC_distance_4_7 g_eff_3_7  g_top5_3_7 g_nottop5_3_7 HC_distance_t1  vaj_eff_t1 diversity_t1 /*
*/ size_t1 immo_eff_t1 sal_eff_t1 tresact_eff_t1 if delta_b == 3 , absorb(orig_dest_year) cluster(code_entry_num apgr_1) // Regression model
gen sample = e(sample) // Define sample

eststo clear 
eststo summa: quietly estpost summ /// Summarize performance indicators
g_eff_3_7  g_top5_3_7 g_nottop5_3_7 g_HC_distance_4_7  survival_7 ///
if e(sample), d

label var g_eff_3_7  "$\Delta$ Employment$\_{\textit{f,t-1,t+3}}$"
label var g_top5_3_7 "$\Delta$ Top 5 occupations$\_{\textit{f,t-1,t+3}}$"
label var g_nottop5_3_7 "$\Delta$ Other occupations$\_{\textit{f,t-1,t+3}}$"
label var g_HC_distance_4_7 "$\Delta$ HC Distance$\_{\textit{f,t,t+3}}$"
label var survival_7 "Survival$\_{\textit{g,n,t+3}}$"

esttab summa, /// Export summary statistics for performance indicators
cells("count(fmt(%15.0fc)) mean(fmt(2)) sd(fmt(2)) p5(fmt(2)) p25(fmt(2)) p50(fmt(2)) p75(fmt(2)) p95(fmt(2))") ///
nonumber label fragment

esttab summa using "${export}\stat_summ_perf.tex", /// Export to LaTeX
cells("count(fmt(%15.0fc)) mean(fmt(2)) sd(fmt(2)) p5(fmt(2)) p25(fmt(2)) p50(fmt(2)) p75(fmt(2)) p95(fmt(2))") ///
star(* 0.10 ** 0.05 *** 0.01) substitute(\_ _)  /* 
*/ nomtitle noobs nonumber fragment booktabs label mlabels(none) collabels(none) noline replace

******************************************************************
* SECTOR LEVEL ANALYSIS
******************************************************************
clear all 
use "${output_stata}\build_buy_agg.dta" // Load aggregated build-buy data

drop if code == code_b // Remove observations where 'code' matches 'code_b'

egen origin_year = group(code year) // Create a group ID for origin by code and year
egen entry_year = group(code_b year) // Create a group ID for entry by code_b and year
gen log_nb_entree = log(1 + nombre_entree) // Log transform entry counts
gen log_nb_buy = log(1 + nombre_buy) // Log transform buy counts
gen log_nb_build = log(1 + nombre_build) // Log transform build counts
gen log_sales_entree = log(1 + sales_entree) // Log transform entry sales
gen log_sales_buy = log(1 + sales_buy) // Log transform buy sales
gen log_sales_build = log(1 + sales_build) // Log transform build sales
gen nb_buy_build = nombre_buy / nombre_entree // Calculate buy/build ratio
gen sales_buy_build = sales_buy / sales_entree // Calculate sales ratio
gen log_nb_entree0 = log(1 + nombre_entree0) // Log transform entry counts (alternate)

reghdfe log_nb_entree log_nb_entree0 distance_HC vlink bvrs_distance same2,  /*
*/	absorb(origin_year entry_year) cluster(code code_b) // Regression for log of entries
gen sample = e(sample)

eststo clear
eststo summa: quietly estpost summ /// Summarize selected variables
distance_HC log_nb_entree log_nb_build  log_nb_buy ///
if e(sample), d

label var distance_HC  "HC Distance$\_{\textit{o,n,t-1}}$"
label var log_nb_entree "log(1+Entries)$\_{\textit{o,n,t}}$"
label var log_nb_build "log(1+Build)$\_{\textit{o,n,t}}$"
label var log_nb_buy "log(1+Buy)$\_{\textit{o,n,t}}$"

esttab summa, /// Export summary statistics to LaTeX
cells("count(fmt(%15.0fc)) mean(fmt(2)) sd(fmt(2)) p5(fmt(2)) p25(fmt(2)) p50(fmt(2)) p75(fmt(2)) p95(fmt(2))") ///
nonumber label fragment

esttab summa using "${export}\stat_summ_sector_lvl1.tex", /// Export the table
cells("count(fmt(%15.0fc)) mean(fmt(2)) sd(fmt(2)) p5(fmt(2)) p25(fmt(2)) p50(fmt(2)) p75(fmt(2)) p95(fmt(2))") ///
star(* 0.10 ** 0.05 *** 0.01) substitute(\_ _)  /* 
*/ nomtitle noobs nonumber fragment booktabs label mlabels(none) collabels(none) noline replace

reghdfe nb_buy_build distance_HC vlink bvrs_distance same2 if nombre_buy > 0 & sample,   /*
*/	absorb(origin_year entry_year) cluster(code code_b) // Regression for buy frequency

label var nb_buy_build "Buy frequency$\_{\textit{o,n,t}}$"

eststo clear
eststo summa: quietly estpost summ /// Summarize buy frequency
nb_buy_build  ///
if e(sample), d

esttab summa, /// Export the summary statistics
cells("count(fmt(%15.0fc)) mean(fmt(2)) sd(fmt(2)) p5(fmt(2)) p25(fmt(2)) p50(fmt(2)) p75(fmt(2)) p95(fmt(2))") ///
nonumber label fragment

esttab summa using "${export}\stat_summ_sector_lvl2.tex", /// Export to LaTeX
cells("count(fmt(%15.0fc)) mean(fmt(2)) sd(fmt(2)) p5(fmt(2)) p25(fmt(2)) p50(fmt(2)) p75(fmt(2)) p95(fmt(2))") ///
star(* 0.10 ** 0.05 *** 0.01) substitute(\_ _)  /* 
*/ nomtitle noobs nonumber fragment booktabs label mlabels(none) collabels(none) noline replace

*****************************************************************************
* RATIOS WITH DIFFERENT LEVELS OF AGGREGATION
*****************************************************************************
foreach i of num 1/5 {

    clear 
    use "${input_stata}\marche_f_`i'.dta" // Load data file for level `i`

    drop if entree == "NA" // Drop missing entries
    gen buy = entree == "external" // Generate buy indicator
    gen Internal = buy == 0 // Create internal indicator
    gen External = buy == 1 // Create external indicator
    gen sales_Internal = sales * Internal // Calculate internal sales
    gen sales_External = sales * External // Calculate external sales

    collapse (sum) Internal External sales_Internal sales_External 
    gen ratio = External / (External + Internal) * 100 // Calculate unweighted ratio
    gen ratio_weighted = sales_External / (sales_External + sales_Internal) * 100 // Calculate weighted ratio

    su Internal External ratio ratio_weighted sales* // Summarize variables
}

*****************************************************************************
* STATISTIC RATIOS
*****************************************************************************
clear 
use "${input_stata}\marche_f_3.dta" // Load data for level 3

drop if entree == "NA" // Drop missing entries
gen buy = entree == "external" // Create buy indicator
gen Internal = buy == 0 // Create internal indicator
gen External = buy == 1 // Create external indicator
gen sales_Internal = sales * Internal // Calculate internal sales
gen sales_External = sales * External // Calculate external sales

collapse (sum) Internal External sales_Internal sales_External, by(year) // Collapse by year
gen ratio = External / (External + Internal) * 100 // Calculate unweighted ratio by year
gen ratio_weighted = sales_External / (sales_External + sales_Internal) * 100 // Calculate weighted ratio by year

label var Internal "Build (number)" 
label var External "Buy (number)" 
label var ratio "Buy (\%, equally-weighted)" 
label var ratio_weighted "Buy (\%, entry sales-weighted)" 

eststo debut: estpost tabstat Internal External, /*
*/ by(year) statistics(sum) /*
 */ columns(statistics) listwise // Create summary table for build and buy counts
esttab debut using "Latex\tables\ratio1.tex", main(mean) cells("sum(fmt(%15.0fc))") replace /*
*/ label nostar unstack nogaps noobs nonote nomtitle nonumber fragment not mlabels(none) collabels(none) nonumber

eststo debut: estpost tabstat ratio ratio_weighted, /*
*/ by(year) statistics(mean) /*
 */ columns(statistics) listwise // Create summary table for ratios
esttab debut using "Latex\tables\ratio2.tex", main(mean) cells("mean(fmt(%15.2fc))") replace /*
*/ label nostar unstack nogaps noobs nonote nomtitle nonumber fragment not mlabels(none) collabels(none) nonumber

collapse (sum) Internal External sales_Internal sales_External // Collapse totals
gen ratio = External / (External + Internal) * 100 // Calculate overall unweighted ratio
gen ratio_weighted = sales_External / (sales_External + sales_Internal) * 100 // Calculate overall weighted ratio

label var Internal "Build (number)" 
label var External "Buy (number)" 
label var ratio "Buy (\%, equally-weighted)" 
label var ratio_weighted "Buy (\%, entry sales-weighted)" 

gen one = 1 // Create constant for overall statistics

eststo debut: estpost tabstat Internal External ratio ratio_weighted, /*
*/  statistics(mean) by(one) /*
 */ columns(statistics) listwise // Summarize overall statistics

*****************************************************************************
* STATISTIC RATIOS
*****************************************************************************
clear 
use "input_stata\marche_f_3.dta" // Load data for analysis

gen share = sales / l1_cagr // Calculate share based on sales and growth
drop if entree == "NA" // Remove observations with missing 'entree'
gen buy = entree == "external" // Create indicator for external entries
gen Internal = (buy == 0) & share > 0.05 & share < . // Identify internal entries within share range
gen External = buy == 1 & share > 0.05 & share < . // Identify external entries within share range
gen sales_Internal = sales * Internal // Calculate internal sales
gen sales_External = sales * External // Calculate external sales

collapse (sum) Internal External sales_Internal sales_External, by(year) // Aggregate data by year
gen ratio = External / (External + Internal) * 100 // Calculate unweighted ratio
gen ratio_weighted = sales_External / (sales_External + sales_Internal) * 100 // Calculate weighted ratio

label var Internal "Build (number)" 
label var External "Buy (number)" 
label var ratio "Build/buy (\%, equally-weighted)" 
label var ratio_weighted "Build/buy (\%, entry sales-weighted)" 

eststo debut: estpost tabstat Internal External, /*
*/ by(year) statistics(sum) /*
 */ columns(statistics) listwise // Create summary table by year
esttab debut using "Latex\tables\ratio1_5pct.tex", main(mean) cells("sum(fmt(%15.0fc))") replace /*
*/ label nostar unstack nogaps noobs nonote nomtitle nonumber fragment not  mlabels(none) collabels(none) nonumber

eststo debut: estpost tabstat ratio ratio_weighted, /*
*/ by(year) statistics(mean) /*
 */ columns(statistics) listwise // Summary table for ratios
esttab debut using "Latex\tables\ratio2_5pct.tex", main(mean) cells("mean(fmt(%15.2fc))") replace /*
*/ label nostar unstack nogaps noobs nonote nomtitle nonumber fragment not  mlabels(none) collabels(none) nonumber

*****************************************************************************
* STATISTIC DESC TABLES DIVERSIFICATION
*****************************************************************************
clear 
use "output_stata\ext_regression.dta" // Load diversification regression data

capture drop diversify
gen diversify = entree != "NA" & entree != "" // Create diversification indicator

reghdfe diversify HC_distance_s_brut_t1 /*
*/ vaj_eff_t1 diversity_t1 size_t1 immo_eff_t1 sal_eff_t1 tresact_eff_t1 if l1_cagr != ., absorb(orig_dest_year) cluster(sirtg_num apgr_1_num code_entry_num)
gen sample = e(sample) // Generate sample indicator

* Summarize variables
summ diversify /// 
HC_distance_nombre_t1 HC_distance_s_brut_t1 HC_distance_nbheur_t1 HC_distance_nombre_t0 HC_distance_secteur ///
vaj_eff_t1 diversity_t1 size_t1 immo_eff_t1 sal_eff_t1 tresact_eff_t1 ///
if sample

eststo clear
eststo summa: quietly estpost summ /// Summarize variables
diversify /// 
HC_distance_nombre_t1 HC_distance_s_brut_t1 HC_distance_nbheur_t1 HC_distance_nombre_t0 HC_distance_secteur ///
if sample, d

esttab summa, /// Export summary statistics
cells("count(fmt(%15.0fc)) mean(fmt(2)) sd(fmt(2)) p5(fmt(2)) p25(fmt(2)) p50(fmt(2)) p75(fmt(2)) p95(fmt(2))") ///
nonumber label fragment

label variable diversify "1(Diversify)$\_{\textit{g,n,t}}$" 
label variable HC_distance_secteur "HC Distance$\_{\textit{g,n,t1}}$"
label variable HC_distance_nombre_t0 "HC Distance$\_{\textit{g,n,t0}}$"
label variable HC_distance_nbheur_t1 "HC Distance$\_{\textit{g,n,t-1}}$ (hours)"
label variable HC_distance_s_brut_t1 "HC Distance$\_{\textit{g,n,t-1}}$ (wages)"
label variable HC_distance_nombre_t1 "HC Distance$\_{\textit{g,n,t-1}}$" 

esttab summa using "Latex\tables\stat_summ_ext.tex", /// Export to LaTeX
cells("count(fmt(%15.0fc)) mean(fmt(2)) sd(fmt(2)) p5(fmt(2)) p25(fmt(2)) p50(fmt(2)) p75(fmt(2)) p95(fmt(2))") ///
star(* 0.10 ** 0.05 *** 0.01) substitute(\_ _)  /* 
*/ nomtitle nonumber fragment booktabs label replace mlabels(none) collabels(none) noline noobs

**********************************************************************************
* STAT APPENDIX
*******************************************************************************
clear 
use output_stata\main_regression.dta // Load main regression data

egen orig_dest_year = group(code_entry apgr_1 year) // Create unique ID by group
reghdfe buy HC_distance_s_brut_t1 /*
*/ vaj_eff_t1 diversity_t1 size_t1 immo_eff_t1 sal_eff_t1 tresact_eff_t1 if share > 0.05, absorb(orig_dest_year) cluster(apgr_1_num code_entry_num) 
gen sample = e(sample)
keep if sample

***
* Top 10 sectors of entry
preserve
bys code_entry: gen nb = _N // Count observations by code_entry
bys code_entry: egen nb_buy = sum(buy) // Count buy entries by code_entry
duplicates drop code_entry, force
keep code_entry nb nb_buy
summ nb*, d 

rename code_entry Code
tostring Code, replace 
merge 1:1 Code using "input_r\sources\nomenclatures\APElibel.dta" // Merge with nomenclature labels
keep if _merge == 3
drop _merge

gsort -nb
gen rank = _n // Create ranking variable
keep if rank <= 10 // Keep top 10
order Code rank APElib nb nb_buy
list Code rank APElib nb nb_buy
drop Code
listtab rank APElib nb nb_buy using "Latex\tables\stat_top10.tex", replace delim(" & ") end(" \\")
restore

*** 
* Most common sector pairs
preserve
bys code_entry apgr_1: gen nb = _N // Count occurrences by code_entry and apgr_1
bys code_entry apgr_1: egen nb_buy = sum(buy) // Calculate sum of buys by pair
duplicates drop code_entry, force
keep code_entry apgr_1 nb nb_buy

duplicates drop code_entry apgr_1, force
tostring code_entry apgr_1, replace 
rename apgr_1 Code
merge m:1 Code using "input_r\sources\nomenclatures\APElibel.dta" // Merge with original labels
keep if _merge == 3
drop _merge
rename APElib APElib_orig

rename Code apgr_1 
rename code_entry Code
merge m:1 Code using "input_r\sources\nomenclatures\APElibel.dta" // Merge with entry labels
keep if _merge == 3
drop _merge
rename APElib APElib_entry
rename Code code_entry

keep if nb_buy >= 1 // Keep pairs with at least one buy
gsort -nb
gen rank = _n // Create ranking variable
keep if rank <= 10 // Keep top 10
order rank APElib_orig APElib_entry nb nb_buy

list code_entry rank APElib_orig APElib_entry nb nb_buy
drop code_entry apgr_1
listtab rank APElib_orig APElib_entry nb nb_buy using "Latex\tables\stat_top10_sectors.tex", replace delim(" & ") end(" \\")
restore



